
#### REPLICATION CODE FOR Company Towns: Single-Industry Dominance and Local Government Capacity

# See README for folder structure
# See CODEBOOK for variable descriptions/sources

# This file produces figures 1 and 2 in the main text

# Set up + load data ####

library(pacman)
p_load(dplyr, here, tidyr, stringr, did, ggplot2, scales, estimatr)

dat <- read.csv(here("county_data.csv"))

# Figure 1: Difference in Differences ####

# identify treated counties
treated <- c(17055, 17155, 18055, 21013, 21095, 21107, 21121, 21193, 21195,
             21235, 39127, 42063, 54019, 54045, 54081, 54109, 17027, 18021,
             18125, 42059, 54057, 54093)

# create panel version of data 
datb <- dat %>%
  # limit to counties with high or no coal industry
  filter(highest<.05|highest>.2) %>%
  # select and pivot to long: government employment
  select(fips_long, pct1930, firsthigh, county_name, state_name, 
         contains("gov")&ends_with("full"), contains("locgov_emp")) %>%
  pivot_longer(cols=c(-fips_long, -pct1930, -firsthigh, -county_name, -state_name), names_to="year", names_prefix="gov|_full|locgov_emp") %>%
  # select and pivot to long: population
  full_join(dat %>%
              filter(highest<.05|highest>.2) %>%
              select(fips_long,
                     contains("pop")&ends_with("full"), contains(c("Population", "pop197", "pop198", "pop199", "pop2")), -contains("lab")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="pop", names_prefix="pop|_full|Population_")
  ) %>%
  # limit to years with employment data 
  filter(year %in% c(1850, 1860, 1870, 1880, 1900, 1910, 1920, 1930, 1940, 
                     1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997,
                     2002, 2007)) %>%
  # recode gov employment variable: set to NA when population missing, and add 1 for logging
  mutate(value = case_when(!is.na(pop)~value+1)) %>%
  # set year to numeric
  mutate(year = as.numeric(year)) %>%
  # set first year of high coal employment to 0 if never high coal employment (for did package)
  mutate(firsthigh = case_when(!is.na(firsthigh)~firsthigh,
                               T~0)) %>%
  # indicator for post-treatment
  mutate(post = case_when(firsthigh==0~0,
                          year>firsthigh~1,
                          T~0)) %>%
  # limit to counties with never high coal employment or in treated group
  filter(firsthigh==0|fips_long %in% treated)

# calculate residualized government employment measure
  # regress logged employment on interaction of population, state, year, only among non-coal counties
mod1 <- lm(log(value)~log(pop)*state_name*factor(year), data=datb[!(datb$fips_long %in% treated),], na.action=na.exclude)
  # calculate difference between actual and predicted gov employment
datb$resid_gov <- log(datb$value) - predict(mod1, type="response", newdata = datb)

# run difference in differences model
mod1 <- att_gt(yname="resid_gov",
               tname = "year",
               idname = "fips_long",
               gname = "firsthigh",
               data=datb %>% drop_na(pop) %>% mutate(fips_long = as.numeric(fips_long)),
               weightsname = "pop",
               control = "nevertreated",
               allow_unbalanced_panel = T)

# aggregate across cohorts
did_out <- aggte(mod1, type = "dynamic", 
                 na.rm=T,
                 max_e = 77,
                 min_e=-20)

# plot
ggdid(did_out) + theme_bw() + 
  xlab("Years pre/post-treatment") + 
  ylab("Diff in diffs: residualized gov emp.") + 
  scale_color_manual(name="Time",
                     label=c("Pre-treat",
                             "Post-treat"),
                     values=c("grey", "black")) +
  geom_hline(yintercept=0, linetype=3) + 
  theme(text = element_text(size=20)) + 
  scale_x_continuous(breaks=c(seq(-20, 80, 10)))


# aggregate just over first 40 years post-treatment for estimate in text

mod2 <- att_gt(yname="resid_gov",
               tname = "year",
               idname = "fips_long",
               gname = "firsthigh",
               data=datb %>% drop_na(pop) %>% filter((year - firsthigh)<=40|(year - firsthigh)>1000) %>% mutate(fips_long = as.numeric(fips_long)),
               weightsname = "pop",
               control = "nevertreated",
               allow_unbalanced_panel = T)
summary(aggte(mod2, type = "simple", na.rm=T))

# clean up environment
rm(datb, did_out, mod1, mod2, treated)

# Figure 2: Instrumental Variables ####

### prepare panel data

# transform spending variables from per-capita to total
dat$spend1957 <- dat$Total.Expenditure_PC_1957*dat$Population_1957
dat$spend1972 <- dat$Total.Expenditure_PC_1972*dat$pop1972
dat$spend1977 <- dat$Total.Expenditure_PC_1977*dat$pop1977
dat$spend1982 <- dat$Total.Expenditure_PC_1982*dat$pop1982
dat$spend1992 <- dat$Total.Expenditure_PC_1992*dat$pop1992

# transform to panel 
datb <- dat %>%
  # gather and pivot employment variables 
  select(fips_long, county_name, state_name, 
         contains("gov")&ends_with("full"), contains("locgov_emp")) %>%
  pivot_longer(cols=c(-fips_long, -county_name, -state_name), names_to="year", values_to="gov", names_prefix="gov|_full|locgov_emp") %>%
  # gather and pivot population variables
  full_join(dat %>%
              select(fips_long,
                     contains("pop")&ends_with("full"), contains(c("Population", "pop197", "pop198", "pop199", "pop2")), -contains("lab")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="pop", names_prefix="pop|_full|Population_")
  ) %>%
  # gather and pivot coal variables
  full_join(dat %>%
              select(fips_long,
                     contains("pct1")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="pct", names_prefix="pct")
  ) %>%
  # gather and pivot property tax variables
  full_join(dat %>%
              select(fips_long,
                     contains("taxlocal"), contains("proptax"), -contains("percap")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="revenue", names_prefix="taxlocal|proptax") %>%
              # adjust years to closest census year
              mutate(year = case_when(year=="1902"~"1900",
                                      year=="1912"~"1910",
                                      year=="1922"~"1920", 
                                      year=="1931"~"1930",
                                      T~year))
  ) %>%
  # gather and pivot spending variables
  full_join(dat %>%
              select(fips_long,
                     contains("spend")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="spend", names_prefix="spend")
  ) %>%
  # gather and pivot property value variables
  full_join(dat %>%
              select(fips_long,
                     contains("pval"), contains("Gross_AV_Total_")) %>%
              pivot_longer(cols=c(-fips_long), names_to="year", values_to="pval", names_prefix="pval|Gross_AV_Total_") %>%
              # adjust years to closest census year
              mutate(year = case_when(year=="1902"~"1900",
                                      year=="1912"~"1910",
                                      year=="1922"~"1920", 
                                      year=="1931"~"1930",
                                      T~year)) %>%
              filter(year!=12) %>%
              mutate(pval = case_when(year>1940~pval*1000, 
                                      T~pval))
  ) %>%
  # pull in time-invariant vars: 1920 coal and coal tons
  left_join(dat %>% select(fips_long, pct1920, coal_tons)) %>%
  mutate(year = as.numeric(year)) %>%
  # rescale coal employment to range from 0 to 1
  mutate(pct = rescale(pct, to=c(0,1))) %>%
  mutate(pct1920 = rescale(pct1920, to=c(0,1)))


### run models

# EMPLOYMENT
  # Pre-1950
    # OLS
mod1 <- lm_robust(log(gov+1)~pct+log(pop), data=datb %>% filter(year<1950), fixed_effects=year+state_name, clusters=fips_long, se_type="stata", weights=pop)
res <- data.frame("Outcome"="Employment",
                  "Period"="Pre-1950",
                  "Model"="OLS", 
                  "Estimate"=mod1$coefficients[1],
                  "ci_lo"=mod1$conf.low[1],
                  "ci_hi"=mod1$conf.high[1])
    # IV 
mod1 <- iv_robust(log(gov+1)~pct+log(pop)|coal_tons+log(pop), data=datb %>% filter(year<1950), fixed_effects=year, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Employment",
                                 "Period"="Pre-1950",
                                 "Model"="IV", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
  # Post-1950
    # OLS
mod1 <- lm_robust(log(gov+1)~pct1920+log(pop), data=datb %>% filter(year>1950), fixed_effects=year+state_name, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Employment",
                                 "Period"="Post-1950",
                                 "Model"="OLS", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
    # IV
mod1 <- iv_robust(log(gov+1)~pct1920+log(pop)|coal_tons+log(pop), data=datb %>% filter(year>1950), fixed_effects=year, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Employment",
                                 "Period"="Post-1950",
                                 "Model"="IV", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))

# REVENUE
  # Pre-1950
    # OLS
mod1 <- lm_robust(log(revenue+1)~pct+log(pop), data=datb %>% filter(year<1950), fixed_effects=year+state_name, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Revenue",
                                 "Period"="Pre-1950",
                                 "Model"="OLS", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
    # IV
mod1 <- iv_robust(log(revenue+1)~pct+log(pop)|coal_tons+log(pop), data=datb %>% filter(year<1950), fixed_effects=year, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Revenue",
                                 "Period"="Pre-1950",
                                 "Model"="IV", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
  # Post-1950
    # OLS
mod1 <- lm_robust(log(revenue+1)~pct1920+log(pop), data=datb %>% filter(year>1950), fixed_effects=year+state_name, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Revenue",
                                 "Period"="Post-1950",
                                 "Model"="OLS", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
    # IV
mod1 <- iv_robust(log(revenue+1)~pct1920+log(pop)|coal_tons+log(pop), data=datb %>% filter(year>1950), fixed_effects=year, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Revenue",
                                 "Period"="Post-1950",
                                 "Model"="IV", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))

# SPENDING
  # Post-1950
    # OLS
mod1 <- lm_robust(log(spend+1)~pct1920+log(pop), data=datb %>% filter(year>1950), fixed_effects=year+state_name, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Spending",
                                 "Period"="Post-1950",
                                 "Model"="OLS", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))
    # IV
mod1 <- iv_robust(log(spend+1)~pct1920+log(pop)|coal_tons+log(pop), data=datb %>% filter(year>1950), fixed_effects=year, clusters=fips_long, se_type="stata", weights=pop)
res <- bind_rows(res, data.frame("Outcome"="Spending",
                                 "Period"="Post-1950",
                                 "Model"="IV", 
                                 "Estimate"=mod1$coefficients[1],
                                 "ci_lo"=mod1$conf.low[1],
                                 "ci_hi"=mod1$conf.high[1]))

# set dodge width
dodge <- position_dodge(width=.2)
# plot results
res %>%
  mutate(Period = factor(Period, levels=c("Pre-1950", "Post-1950"), ordered=TRUE)) %>%
  ggplot() +
  geom_point(aes(x=Outcome, y=Estimate, color=Model), position=dodge) +
  geom_errorbar(aes(x=Outcome, ymin=ci_lo, ymax=ci_hi, color=Model), position=dodge) +
  facet_wrap(~Period, scales="free_x") + theme_bw() +
  theme(text=element_text(size=15), plot.title = element_text(hjust = 0.5)) +
  geom_hline(yintercept=0, linetype=2) + 
  scale_color_manual(name="Model", values=c("black", "gray70"))


# clean up objects 
rm(dodge, mod1, res, datb)
